# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from gushici.items import BiqugeItem
import re

class BiqugeSpider(CrawlSpider):
    name = 'biquge'
    allowed_domains = ['www.biquyun.com']
    start_urls = ['http://www.biquyun.com/modules/article/soshu.php?searchkey=+%C8%E5%B5%C0%D6%C1%CA%A5']

    rules = (
        Rule(LinkExtractor(allow='.*',restrict_xpaths='//div[@id=list]//a'), callback='parse_item'),
    )

    def parse(self,response):
        print(response.url)
        print(response.text)

    def parse_item(self, response):
        print('******************')
        item = BiqugeItem()
        item['title'] = response.xpath('//div[@class="bookname"]/h1/text()').extract_first()
        item['content'] = response.xpath('string(//div[@id="content"])').extract()
        return item
